
if __name__ == "__main__":
    dict_file = 'data/crosslingual/dictionaries/vi-en.0-5000.txt'
    tot = set()
    cnt = set()
    with open(dict_file, 'r') as f:
        for line in f:
            src, trg = line.split()
            tot.add(src)
            if src == trg:
                cnt.add(src)
    
    print(len(cnt), len(tot), len(cnt)/len(tot))
    print(cnt)